Visualize Location Analysis for Yelp Businesses in Pittsburgh Area (Beta Version)

library(ggmap)
## Warning: package 'ggmap' was built under R version 3.4.4
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.4.4
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
library(ggplot2)
library(htmlwidgets)
## Warning: package 'htmlwidgets' was built under R version 3.4.4
library(magrittr)
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:ggmap':
## 
##     inset
library(leaflet)
## Warning: package 'leaflet' was built under R version 3.4.4
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.4
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tibble)
## Warning: package 'tibble' was built under R version 3.4.3
library(shiny)
## Warning: package 'shiny' was built under R version 3.4.4
library(leaflet.extras)
## Warning: package 'leaflet.extras' was built under R version 3.4.4
library(jsonlite)
## 
## Attaching package: 'jsonlite'
## The following object is masked from 'package:shiny':
## 
##     validate
yelp <- stream_in(file("business.json"))
## opening file input connection.
## 
 Found 500 records...
 Found 1000 records...
 Found 1500 records...
 Found 2000 records...
 Found 2500 records...
 Found 3000 records...
 Found 3500 records...
 Found 4000 records...
 Found 4500 records...
 Found 5000 records...
 Found 5500 records...
 Found 6000 records...
 Found 6500 records...
 Found 7000 records...
 Found 7017 records...
 Imported 7017 records. Simplifying...
## closing file input connection.
yelp_flat <- flatten(yelp)
yelp_data <- as_data_frame(yelp_flat)
yelp_data <- yelp_data[which(yelp_data$review_count>=50),]
yelp_data[yelp_data==""]<-NA

drop <- c("hours.Monday","hours.Tuesday","hours.Wednesday","hours.Thursday","hours.Friday","hours.Saturday","hours.Sunday","attributes.AcceptsInsurance","attributes.Open24Hours","attributes.AgesAllowed","attributes.BYOB","attributes.HairSpecializesIn","attributes.Corkage","attributes.DriveThru","attributes.ByAppointmentOnly","attributes.BYOBCorkage","attributes.DogsAllowed","attributes.Smoking","attributes.CoatCheck","attributes.BusinessParking","attributes.Ambience","attributes.GoodForMeal","attributes.Music","attributes.BestNights","attributes.WheelchairAccessible","attributes.BusinessAcceptsBitcoin","attributes.GoodForDancing","attributes.RestaurantsAttire","attributes.NoiseLevel","attributes.Alcohol","attributes.HappyHour","attributes.RestaurantsTakeOut","attributes.RestaurantsDelivery","attributes.RestaurantsReservations","attributes.BusinessAcceptsCreditCards","attributes.RestaurantsGoodForGroups","attributes.GoodForKids","attributes.HasTV","attributes.OutdoorSeating","attributes.WiFi","attributes.RestaurantsTableService","attributes.Caters","attributes.BikeParking")
yelp_data <- yelp_data[,!(names(yelp_data) %in% drop)]
sort(colSums(is.na(yelp_data)),decreasing = T)
## attributes.RestaurantsPriceRange2                           address 
##                                55                                 8 
##                       business_id                              name 
##                                 0                                 0 
##                              city                             state 
##                                 0                                 0 
##                       postal_code                          latitude 
##                                 0                                 0 
##                         longitude                             stars 
##                                 0                                 0 
##                      review_count                           is_open 
##                                 0                                 0 
##                        categories 
##                                 0
completeFun <- function(data, desiredCols) {
  completeVec <- complete.cases(data[, desiredCols])
  return(data[completeVec, ])
}
yelp_data <- completeFun(yelp_data, "attributes.RestaurantsPriceRange2")
yelp_data <- completeFun(yelp_data, "address")
biz <- yelp_data
head(biz)
## # A tibble: 6 x 13
##   business_id name  address city  state postal_code latitude longitude
##   <chr>       <chr> <chr>   <chr> <chr> <chr>          <dbl>     <dbl>
## 1 dQj5DLZjeD… Apte… 4606 P… Pitt… PA    15224           40.5     -79.9
## 2 v-scZMU6jh… No. … 436 Ma… Pitt… PA    15222           40.4     -80.0
## 3 CU047MuCn7… Carn… 4400 F… Pitt… PA    15213           40.4     -80.0
## 4 F6c3D1o9Z4… Hell… 701 Pr… Pitt… PA    15237           40.6     -80.0
## 5 JrrY4v21k7… Kell… 6012 P… Pitt… PA    15206           40.5     -79.9
## 6 sMzNLdhJZG… Sien… 942 Pe… Pitt… PA    15222           40.4     -80.0
## # ... with 5 more variables: stars <dbl>, review_count <int>,
## #   is_open <int>, categories <chr>,
## #   attributes.RestaurantsPriceRange2 <chr>
register_google(key = "your-key")
qmap(location = "pittsburgh") 
## Source : https://maps.googleapis.com/maps/api/staticmap?center=pittsburgh&zoom=10&size=640x640&scale=2&maptype=terrain&language=en-EN&key=xxx
## Warning in strptime(x, fmt, tz = "GMT"): unknown timezone 'zone/tz/2019a.
## 1.0/zoneinfo/America/New_York'
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=pittsburgh&key=xxx

ggmap(
  ggmap = get_map(
     c(lon = mean(biz$longitude), lat = mean(biz$latitude)),
     zoom = 11, 
     scale = "auto",
     maptype = "terrain", #"terrain", "terrain-background", "satellite", "roadmap", and "hybrid" (google maps), "terrain", "watercolor", and "toner"
     source = "google"),
  extent = "device",
  legend = "topright"
  ) + geom_point(data = biz, aes(x = biz$longitude, y = biz$latitude), alpha = 0.5, color = "red") + theme(legend.position = "right") + 
  labs(
    x = "Longitude", 
    y = "Latitude", 
    title = "Yelp - Pittsburgh")
## Source : https://maps.googleapis.com/maps/api/staticmap?center=40.446394,-79.981998&zoom=11&size=640x640&scale=2&maptype=terrain&language=en-EN&key=xxx
## Warning: Removed 5 rows containing missing values (geom_point).

Assumption: the potential merchant wants to start up a restaurant with

- more than average of stars (3.76),

- review count (145) and

- more than median of price range (2).

Pull yelp sites with more than average stars in Pittsburgh

# Pull yelp sites with more than average stars in Pittsburgh
avg <- mean(biz$stars) 
pittTopStarFrame = biz %>% filter(stars > avg) # 543
## Warning: package 'bindrcpp' was built under R version 3.4.4
content <- paste("Name:", pittTopStarFrame$name, "<br>",
                 "Address:", pittTopStarFrame$address, "<br>",
                 "Postal Code:", pittTopStarFrame$postal_code, "<br>",
                 "Categories:", pittTopStarFrame$categories, "<br>",
                 "Star:", pittTopStarFrame$stars)
pal <- colorNumeric(
  palette = "YlOrRd", #https://www.r-bloggers.com/palettes-in-r/
  domain = pittTopStarFrame$stars
)

leaflet(pittTopStarFrame) %>% addTiles() %>% 
  setView(lng = mean(pittTopStarFrame$longitude), lat = mean(pittTopStarFrame$latitude), zoom = 11) %>% 
  addCircleMarkers(lng = ~pittTopStarFrame$longitude, lat = ~pittTopStarFrame$latitude, clusterOptions = markerClusterOptions(), popup = ~content)

Pull yelp sites with more than average review count in Pittsburgh

# Pull yelp sites with more than average review count in Pittsburgh
avg1 <- mean(biz$review_count) 
pittTopReviewCountFrame = biz %>% filter(review_count > avg1) # 297
content <- paste("Name:", pittTopReviewCountFrame$name, "<br>",
                 "Address:", pittTopReviewCountFrame$address, "<br>",
                 "Postal Code:", pittTopReviewCountFrame$postal_code, "<br>",
                 "Categories:", pittTopReviewCountFrame$categories, "<br>",
                 "Review Count:", pittTopReviewCountFrame$review_count)
pal <- colorNumeric(
  palette = "YlOrRd", #https://www.r-bloggers.com/palettes-in-r/
  domain = pittTopReviewCountFrame$review_count
)

leaflet(pittTopReviewCountFrame) %>% addTiles() %>% 
  setView(lng = mean(pittTopReviewCountFrame$longitude), lat = mean(pittTopReviewCountFrame$latitude), zoom = 11) %>% 
  addCircleMarkers(lng = ~pittTopReviewCountFrame$longitude, lat = ~pittTopReviewCountFrame$latitude, clusterOptions = markerClusterOptions(), popup = ~content)

Pull yelp sites with more than median price range in Pittsburgh

# Pull yelp sites with more than median price range in Pittsburgh
med <- median(as.numeric(biz$attributes.RestaurantsPriceRange2))
pittTopPriceRangeFrame = biz %>% filter(attributes.RestaurantsPriceRange2 > med) # 297
content <- paste("Name:", pittTopPriceRangeFrame$name, "<br>",
                 "Address:", pittTopPriceRangeFrame$address, "<br>",
                 "Postal Code:", pittTopPriceRangeFrame$postal_code, "<br>",
                 "Categories:", pittTopPriceRangeFrame$categories, "<br>",
                 "Price Range:", pittTopPriceRangeFrame$attributes.RestaurantsPriceRange2)
pal <- colorNumeric(
  palette = "YlOrRd", #https://www.r-bloggers.com/palettes-in-r/
  domain = as.numeric(pittTopPriceRangeFrame$attributes.RestaurantsPriceRange2)
)

leaflet(pittTopPriceRangeFrame) %>% addTiles() %>% 
  setView(lng = mean(pittTopPriceRangeFrame$longitude), lat = mean(pittTopPriceRangeFrame$latitude), zoom = 11) %>% 
  addCircleMarkers(lng = ~pittTopPriceRangeFrame$longitude, lat = ~pittTopPriceRangeFrame$latitude, clusterOptions = markerClusterOptions(), popup = ~content)

Combination of the Scenarios shown above

content_stars <- paste("Name:", pittTopStarFrame$name, "<br>",
                 "Address:", pittTopStarFrame$address, "<br>",
                 "Postal Code:", pittTopStarFrame$postal_code, "<br>",
                 "Categories:", pittTopStarFrame$categories, "<br>",
                 "Review Count:", pittTopStarFrame$review_count, "<br>",
                 "Price Range:", pittTopStarFrame$attributes.RestaurantsPriceRange2, "<br>",
                 "Star:", pittTopStarFrame$stars)

content_review_count <- paste("Name:", pittTopReviewCountFrame$name, "<br>",
                 "Address:", pittTopReviewCountFrame$address, "<br>",
                 "Postal Code:", pittTopReviewCountFrame$postal_code, "<br>",
                 "Categories:", pittTopReviewCountFrame$categories, "<br>",
                 "Review Count:", pittTopReviewCountFrame$review_count, "<br>",
                 "Price Range:", pittTopReviewCountFrame$attributes.RestaurantsPriceRange2, "<br>",
                 "Star:", pittTopReviewCountFrame$stars)

content_price_range <- paste("Name:", pittTopPriceRangeFrame$name, "<br>",
                 "Address:", pittTopPriceRangeFrame$address, "<br>",
                 "Postal Code:", pittTopPriceRangeFrame$postal_code, "<br>",
                 "Categories:", pittTopPriceRangeFrame$categories, "<br>",
                 "Review Count:", pittTopPriceRangeFrame$review_count, "<br>",
                 "Price Range:", pittTopPriceRangeFrame$attributes.RestaurantsPriceRange2, "<br>",
                 "Star:", pittTopPriceRangeFrame$stars)

pal <- colorNumeric(
  palette = "YlOrRd", #https://www.r-bloggers.com/palettes-in-r/
  domain = as.numeric(biz$attributes.RestaurantsPriceRange2)
)

leaflet() %>% addTiles() %>% 
  setView(lng = mean(biz$longitude), lat = mean(biz$latitude), zoom = 11) %>% 
  addProviderTiles("Esri.WorldImagery", group="background 1") %>%
  addTiles(options = providerTileOptions(noWrap = TRUE), group="background 2") %>%
  addCircleMarkers(data=pittTopStarFrame, lng = ~pittTopStarFrame$longitude, lat = ~pittTopStarFrame$latitude, clusterOptions = markerClusterOptions(), popup = ~content_stars, group="star") %>%
  addCircleMarkers(data=pittTopReviewCountFrame, lng = ~pittTopReviewCountFrame$longitude, lat = ~pittTopReviewCountFrame$latitude, clusterOptions = markerClusterOptions(), popup = ~content_review_count, group="review_count") %>%
  addCircleMarkers(data=pittTopPriceRangeFrame, lng = ~pittTopPriceRangeFrame$longitude, lat = ~pittTopPriceRangeFrame$latitude, clusterOptions = markerClusterOptions(), popup = ~content_price_range, group="price_range") %>%
  addLayersControl(overlayGroups = c("star","review_count","price_range") , baseGroups = c("background 1","background 2"), options = layersControlOptions(collapsed = FALSE))

Search for Name, Zip Code and Categories to help potential merchants understand the competitors

content <- paste("Name:", biz$name, "<br>",
                 "Address:", biz$address, "<br>",
                 "Postal Code:", biz$postal_code, "<br>",
                 "Categories:", biz$categories, "<br>",
                 "Price Range:", biz$attributes.RestaurantsPriceRange2, "<br>",
                 "Review Count:", biz$review_count, "<br>",
                 "Star:", biz$stars)

leaflet(biz) %>% addProviderTiles(providers$OpenStreetMap) %>%
  addCircles(data=biz, lng = ~biz$longitude, lat = ~biz$latitude, weight = 1, fillOpacity = 0.5, 
             popup = ~content, label = ~biz$name, group = "name") %>%  
  addCircles(data=biz, lng = ~biz$longitude, lat = ~biz$latitude, weight = 1, fillOpacity = 0.5, 
             popup = ~content, label = ~biz$postal_code, group = "zip") %>%
  addCircles(data=biz, lng = ~biz$longitude, lat = ~biz$latitude, weight = 1, fillOpacity = 0.5, 
             popup = ~content, label = ~biz$categories, group = "cat") %>%  
  addResetMapButton() %>%
  addSearchFeatures(targetGroups = c("name","zip","cat"),options = searchFeaturesOptions(zoom = 12, openPopup = TRUE, firstTipSubmit = TRUE, autoCollapse = TRUE, hideMarkerOnCollapse = TRUE )) %>%
  addControl("<P><B>Hint!</B> Search for ...<br/><ul><li>Name of Restaurant</li><li>Zip Code</li><li>Categories</li></ul></P>",position = "bottomright")

Statistical Analysis along with Visualization (Popularity Analysis)

pal <- colorNumeric(
  palette = "YlOrRd", #https://www.r-bloggers.com/palettes-in-r/
  domain = pittTopStarFrame$stars
)

server <- function(input, output) {
    # create a reactive value that will store the click position
    data_of_click <- reactiveValues(clickedMarker=NULL)

    # Leaflet map with 2 markers
    output$map <- renderLeaflet({

      leaflet() %>%
        addProviderTiles(providers$Esri.WorldStreetMap) %>%
        addResetMapButton() %>%
        addSearchOSM() %>%
        addSearchGoogle() %>%
        addSearchUSCensusBureau()
    
      # Reverse Geocoding using OSM
      leaflet()  %>%
        addProviderTiles(providers$OpenStreetMap) %>%
        addResetMapButton() %>%
        addReverseSearchOSM()
      
      leaflet(biz) %>% addTiles() %>% 
          setView(lng = mean(biz$longitude), lat = mean(biz$latitude), zoom = 11) %>% 
          addProviderTiles("Esri.WorldImagery", group="background 1") %>%
          addTiles(options = providerTileOptions(noWrap = TRUE), group="background 2") %>%
          addCircleMarkers(data=pittTopStarFrame, lng = ~pittTopStarFrame$longitude, lat = ~pittTopStarFrame$latitude, clusterOptions = markerClusterOptions(), popup = ~content_stars, group="star") %>%
          addCircleMarkers(data=pittTopReviewCountFrame, lng = ~pittTopReviewCountFrame$longitude, lat = ~pittTopReviewCountFrame$latitude, clusterOptions = markerClusterOptions(), popup = ~content_review_count, group="review_count") %>%
          addCircleMarkers(data=pittTopPriceRangeFrame, lng = ~pittTopPriceRangeFrame$longitude, lat = ~pittTopPriceRangeFrame$latitude, clusterOptions = markerClusterOptions(), popup = ~content_price_range, group="price_range") %>%
          addLayersControl(overlayGroups = c("star","review_count","price_range") , baseGroups = c("background 1","background 2"), options = layersControlOptions(collapsed = FALSE))
      })

    # store the click
    observeEvent(input$map_marker_click,{
    data_of_click$clickedMarker <- input$map_marker_click
    })

    # Make a barplot depending of the selected point
    output$plot=renderPlot({
      my_place=data_of_click$clickedMarker$id
      barplot(rnorm(10), col=rgb(0.1,0.4,0.9,0.3))
    })
}

ui <- fluidPage(
    br(),
    column(8,leafletOutput("map", height="600px")),
    column(4,br(),br(),br(),br(),plotOutput("plot", height="300px")),
    br()
)

shinyApp(ui = ui, server = server)
Shiny applications not supported in static R Markdown documents
library(rsconnect)
## Warning: package 'rsconnect' was built under R version 3.4.4
## 
## Attaching package: 'rsconnect'
## The following object is masked from 'package:shiny':
## 
##     serverInfo
rsconnect::deployApp('Yelp_Visualize.Rmd')
## Discovering document dependencies...
## OK
## Preparing to deploy document...DONE
## Uploading bundle for document: 870846...DONE
## Deploying bundle: 2017327 for document: 870846 ...
## Waiting for task: 605583539
##   building: Parsing manifest
##   building: Fetching packages
##   building: Installing packages
##   building: Installing files
##   building: Pushing image: 2121683
##   deploying: Starting instances
##   rollforward: Activating new instances
##   success: Stopping old instances
## Document successfully deployed to https://zigman.shinyapps.io/yelp_visualize/

To Be Continued …